{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 62,
   "id": "4921c412",
   "metadata": {},
   "outputs": [],
   "source": [
    "from llama_index import (\n",
    "    SummaryIndex,\n",
    "    SimpleDirectoryReader,\n",
    "    LLMPredictor,\n",
    "    ServiceContext,\n",
    ")\n",
    "from llama_index.response.notebook_utils import display_response\n",
    "from llama_index.llms import OpenAI\n",
    "from IPython.display import Markdown, display"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "261d923e",
   "metadata": {},
   "outputs": [],
   "source": [
    "documents = SimpleDirectoryReader(\"data\").load_data()"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "f23b5169",
   "metadata": {},
   "source": [
    "# davinci-003"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "id": "0c635cdb",
   "metadata": {},
   "outputs": [],
   "source": [
    "llm = OpenAI(temperature=0, model=\"text-davinci-003\")\n",
    "service_context = ServiceContext.from_defaults(llm=llm)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "id": "b8ad1a2a",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "INFO:llama_index.token_counter.token_counter:> [build_index_from_documents] Total LLM token usage: 0 tokens\n",
      "INFO:llama_index.token_counter.token_counter:> [build_index_from_documents] Total embedding token usage: 0 tokens\n"
     ]
    }
   ],
   "source": [
    "davinci_index = SummaryIndex.from_documents(documents, service_context=service_context)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "id": "c9925597",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'Document is split into 6 nodes.'"
      ]
     },
     "execution_count": 45,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "f\"Document is split into {len(davinci_index._index_struct.nodes)} nodes.\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "id": "fa1d7242",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "INFO:llama_index.indices.common.tree.base:> Building index from nodes: 5 chunks\n",
      "INFO:llama_index.token_counter.token_counter:> [query] Total LLM token usage: 19882 tokens\n",
      "INFO:llama_index.token_counter.token_counter:> [query] Total embedding token usage: 0 tokens\n"
     ]
    }
   ],
   "source": [
    "query_engine = davinci_index.as_query_engine(response_mode=\"tree_summarize\")\n",
    "response = query_engine.query(\n",
    "    \"What happened on one night in October 2003?\",\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "id": "d758bdb7",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/markdown": [
       "**`Final Response:`** It is not possible to answer this question with the given context information."
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/markdown": [
       "---"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/markdown": [
       "**`Source Node 1/6`**"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/markdown": [
       "**Document ID:** 2ea119c7-fc3d-4090-a47a-8dd2a0d37416<br>**Similarity:** None<br>**Text:** What I Worked On\n",
       "\n",
       "February 2021\n",
       "\n",
       "Before college the two main things I worked on, outside of schoo...<br>"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/markdown": [
       "---"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/markdown": [
       "**`Source Node 2/6`**"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/markdown": [
       "**Document ID:** 2ea119c7-fc3d-4090-a47a-8dd2a0d37416<br>**Similarity:** None<br>**Text:** whereby the students wouldn't require the faculty to teach anything, and in return the faculty wo...<br>"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/markdown": [
       "---"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/markdown": [
       "**`Source Node 3/6`**"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/markdown": [
       "**Document ID:** 2ea119c7-fc3d-4090-a47a-8dd2a0d37416<br>**Similarity:** None<br>**Text:** fact that our software worked via the web, and we got $10,000 in seed funding from Idelle's husba...<br>"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/markdown": [
       "---"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/markdown": [
       "**`Source Node 4/6`**"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/markdown": [
       "**Document ID:** 2ea119c7-fc3d-4090-a47a-8dd2a0d37416<br>**Similarity:** None<br>**Text:** project was the new Lisp, whose parentheses I now wouldn't even have to hide. A lot of Lisp hacke...<br>"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/markdown": [
       "---"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/markdown": [
       "**`Source Node 5/6`**"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/markdown": [
       "**Document ID:** 2ea119c7-fc3d-4090-a47a-8dd2a0d37416<br>**Similarity:** None<br>**Text:** chance it had to do with HN, and a 40% chance it had do with everything else combined. [17]\n",
       "\n",
       "As w...<br>"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/markdown": [
       "---"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/markdown": [
       "**`Source Node 6/6`**"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/markdown": [
       "**Document ID:** 2ea119c7-fc3d-4090-a47a-8dd2a0d37416<br>**Similarity:** None<br>**Text:** and some people dislike being told such things.\n",
       "\n",
       "[11] People put plenty of stuff on the internet ...<br>"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "display_response(response)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "3f843a73",
   "metadata": {},
   "source": [
    "# gpt-4"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "id": "0849d860",
   "metadata": {},
   "outputs": [],
   "source": [
    "llm = OpenAI(temperature=0, model=\"gpt-4\")\n",
    "service_context = ServiceContext.from_defaults(llm=llm)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "id": "bb9eff4a",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "INFO:llama_index.token_counter.token_counter:> [build_index_from_documents] Total LLM token usage: 0 tokens\n",
      "INFO:llama_index.token_counter.token_counter:> [build_index_from_documents] Total embedding token usage: 0 tokens\n"
     ]
    }
   ],
   "source": [
    "gpt4_index = SummaryIndex.from_documents(documents, service_context=service_context)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "id": "cb56a205",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'Document is split into 3 nodes.'"
      ]
     },
     "execution_count": 65,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "f\"Document is split into {len(gpt4_index._index_struct.nodes)} nodes.\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "id": "44dda700",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "INFO:llama_index.indices.common.tree.base:> Building index from nodes: 2 chunks\n",
      "INFO:llama_index.token_counter.token_counter:> [query] Total LLM token usage: 18006 tokens\n",
      "INFO:llama_index.token_counter.token_counter:> [query] Total embedding token usage: 0 tokens\n"
     ]
    }
   ],
   "source": [
    "query_engine = gpt4_index.as_query_engine(response_mode=\"tree_summarize\")\n",
    "response = query_engine.query(\n",
    "    \"What happened on one night in October 2003?\",\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "id": "42bd0984",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/markdown": [
       "**`Final Response:`** On one night in October 2003, there was a big party at Paul Graham's house, organized by his friend Maria Daniels. At this party, Paul met Jessica Livingston, who would later become his partner in starting Y Combinator. Additionally, Paul Graham had a conversation with his friend Robert Morris about starting a new kind of venture firm that would fund startups in batches, which eventually led to the creation of Y Combinator."
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/markdown": [
       "---"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/markdown": [
       "**`Source Node 1/3`**"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/markdown": [
       "**Document ID:** 2ea119c7-fc3d-4090-a47a-8dd2a0d37416<br>**Similarity:** 0.740238551627948<br>**Text:** What I Worked On\n",
       "\n",
       "February 2021\n",
       "\n",
       "Before college the two main things I worked on, outside of schoo...<br>"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/markdown": [
       "---"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/markdown": [
       "**`Source Node 2/3`**"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/markdown": [
       "**Document ID:** 2ea119c7-fc3d-4090-a47a-8dd2a0d37416<br>**Similarity:** None<br>**Text:** really good. He recommended Trevor Blackwell, which surprised me at first, because at that point ...<br>"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/markdown": [
       "---"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/markdown": [
       "**`Source Node 3/3`**"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/markdown": [
       "**Document ID:** 2ea119c7-fc3d-4090-a47a-8dd2a0d37416<br>**Similarity:** None<br>**Text:** make nuclear reactors. But I kept at it, and in October 2013 he finally agreed. We decided he'd t...<br>"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "display_response(response)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "fd981e5e",
   "metadata": {},
   "source": [
    "# gpt-4-32k"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "9d9f20a9",
   "metadata": {},
   "source": [
    "NOTE: not available yet"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "71137f57",
   "metadata": {},
   "outputs": [],
   "source": [
    "llm = OpenAI(temperature=0, model_name=\"gpt-4-32k\")\n",
    "service_context = ServiceContext.from_defaults(llm=llm)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "bb619782",
   "metadata": {},
   "outputs": [],
   "source": [
    "from llama_index import VectorStoreIndex\n",
    "\n",
    "\n",
    "gpt4_32k_index = VectorStoreIndex.from_documents(\n",
    "    documents, service_context=service_context\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7d417f6a",
   "metadata": {},
   "outputs": [],
   "source": [
    "len(gpt4_32k_index._index_struct.nodes_dict)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
